library(gprofiler2)
library(viridis)
library(stringr)
library(signatureSearch)
library(ggplot2)
library(dplyr)
library(here)
library(viridis)

Custom functions for FEA

Enrichment function for up and down genes

enrich <- function(upgenes, downgenes, topreturned) {#, filepath) {
  up_fea <- gost(upgenes, multi_query = FALSE, correction_method = "bonferroni", evcodes = TRUE)
  #remove abitrary pathways -- any pathways with > 1000 genes
    up_fea <- up_fea$result %>% dplyr::filter(., term_size < 1000 & term_size > 5) %>% dplyr::mutate(., .keep = "all", direction = "upregulated")
    up_mat <- up_fea  %>% dplyr::arrange(., desc(intersection_size)) %>% dplyr::slice_head(., n = topreturned)
  
  down_fea <- gost(downgenes, multi_query = FALSE, correction_method = "bonferroni", evcodes = TRUE)
  #remove abitrary pathways -- any pathways with > 1000 genes
  down_fea <- down_fea$result %>% dplyr::filter(., term_size < 1000 & term_size > 5) %>% dplyr::mutate(., .keep = "all", direction = "downregulated") 
  down_mat <- down_fea  %>% dplyr::arrange(., desc(intersection_size)) %>% dplyr::slice_head(., n = topreturned)
  combined_fea <- rbind(up_mat, down_mat)
  
  fea_res <- list(downregulated = down_fea, upregulated = up_fea, topcompared = combined_fea)

return(fea_res)
}

MOUSE GENES Enrichment function for up and down genes

#Runs an ordered query
fea <- function(genelist, topreturned){
  fea <- gost(genelist, organism = "mmusculus", ordered_query = FALSE, multi_query = FALSE, evcodes = TRUE, correction_method = "bonferroni", sources = c("GO:BP", "GO:MF", "GO:CC", "KEGG", "REAC", "MIRNA", "CORUM", "HP", "HPA", "WP"))
  #remove abitrary pathways -- any pathways with > 1000 genes
  fea <- fea$result %>% dplyr::filter(., term_size < 1000 & term_size > 10)
return(fea)
}

mouseenrich <- function(upgenes, downgenes, topreturned) {
  upgenes$LFC <- sort(as.numeric(upgenes$LFC), decreasing = TRUE)
  up_fea <- fea(upgenes$Mouse.gene.stable.ID, topreturned$Mouse.gene.stable.ID)
  up_fea <- dplyr::mutate(up_fea, .keep = "all", direction = "upregulated")
  
  downgenes$LFC <- sort(as.numeric(downgenes$LFC))
  down_fea <- fea(downgenes$Mouse.gene.stable.ID, topreturned$Mouse.gene.stable.ID)
  down_fea <- dplyr::mutate(down_fea, .keep = "all", direction = "downregulated")
  
  up_mat <- up_fea  %>% dplyr::arrange(., desc(recall)) %>% dplyr::slice_head(., n = topreturned)
  down_mat <- down_fea  %>% dplyr::arrange(., desc(recall)) %>% dplyr::slice_head(., n = topreturned)
  combined_fea <- rbind(up_mat, down_mat)
  fea_res <- list(downregulated = down_fea, upregulated = up_fea, topcompared = combined_fea)

return(fea_res)
}

Custom background for FEA (for enrichment using mouse to human to entrez to lincs conversion – background gene list is all possible measured genes that have gone through those conversions)

#Needs to only be possible genes -- all genes measured in salmon gene counts that map to human entrez
#mesgenes <- data@NAMES
#remove version numbers
#mesgenes <- gsub("\\..*", "", mesgenes)

#genebg <- convertMouseGeneList(mesgenes)
#write.csv2(genebg, file = "./data/background_genelist_genestoLINCSgenes.csv")

genebg <- read.csv(file = here("data", "background_genelist_genestoLINCSgenes.csv"))

bubbleplot, up and down for x axis and terms on y axis

bubbleplot <- function(combined_fea){
  bubbleplot <- ggplot(combined_fea, aes(x=direction, y=term_name, size = intersection_size, fill = p_value)) +
  geom_point(alpha=0.7, shape = 21) +
  scale_size(range = c(2, 10), name = "# Genes Matched to Term") + 
  scale_fill_distiller(palette = "Purples") + 
  labs(x = "Differential Expression Direction", y = "Functional Enrichment Terms") + 
  theme_minimal(base_size = 8) #+ labs(title = "Top Enriched Terms")
#  ggsave(filepath, bubbleplot, width = 10, height = 7)
return(bubbleplot)
}

barplot, up and down merged, colored by p-value, terms on y axis and # genes on x axis

barplot <- function(combined_fea){
  barplot <- ggplot(combined_fea, aes(x=intersection_size, y=term_name,  fill = recall)) + 
    geom_bar( stat = "identity") + 
    scale_fill_continuous(type = "viridis") + 
    labs(x = "Number of Genes Matched to Term", y = "Functional Terms") + 
    theme_minimal(base_size = 8)
return(barplot)
}

subsetting terms that are mitochondria dysfunction-related

subsetting terms that are ciliopathy-related terms

Subsetting for fibrosis-realted terms

immune terms Subsetting for fibrosis-realted terms

subsetting terms that are SKF-96365 related (a store-operated Ca2+ entry (SOCE) inhibitor)

### Load in (top 100 human) data
Load in top 100 up and down genes used as signatures for signatureSearch

’39 precystic p70

hom_degs_UP <- read.csv(here("res", "deseq2_outputs", "GSE149739_degs_UP_220421.csv"))

hom_degs_DOWN <- read.csv(here("res", "deseq2_outputs", "GSE149739_degs_DOWN_220421.csv"))

’19 cystic p28

hom_19_UP <- read.csv(here("res", "deseq2_outputs", "GSE134719_degs_UP_220421.csv"))
hom_19_DOWN <- read.csv(here("res", "deseq2_outputs", "GSE134719_degs_DOWN_220421.csv"))

’56 cystic p21

hom_56_UP <- read.csv(here("res", "deseq2_outputs", "GSE69556_degs_UP_220421.csv"))

hom_56_DOWN <- read.csv(here("res", "deseq2_outputs", "GSE69556_degs_DOWN_220421.csv"))

Comparing results across datasets

“Hom” refers to homosapiens – genes that have been mapped from mouse to human ### Compare number of DEGs - datasets

FEA p70 precystic GSE149739 (top 100 human orthologs)

fea_nf_GSE149739 <- enrich(as.character(hom_degs_UP$entrezgene_id), as.character(hom_degs_DOWN$entrezgene_id), 30)

Barplot of overall top enrichment terms

bp_39 <- barplot(fea_nf_GSE149739$topcompared)
bp_39

Bubbleplot of cilia-related enriched terms

Bubbleplot of mitochondria-related enriched terms

SOCE-related pathways for ’19 and ’56 overlap

FEA p21 cystic GSE69556 (top 100 human orthologs)

fea_GSE69556 <- enrich(as.character(hom_56_UP$entrezgene_id), as.character(hom_56_DOWN$entrezgene_id), 30)

Barplot of overall top enrichment terms

bp_56 <- barplot(fea_GSE69556$topcompared)
bp_56

Bubbleplot of cilia-related enriched terms

Bubbleplot of mitochondria-related enriched terms

SOCE-related pathways for ’19 and ’56 overlap

FEA p28 cystic GSE134719 (top 100 human orthologs)

fea_GSE134719 <- enrich(as.character(hom_19_UP$entrezgene_id), as.character(hom_19_DOWN$entrezgene_id), 30)

Barplot of overall top enrichment terms

bar19 <- barplot(fea_GSE134719$topcompared)
bar19

Bubbleplot of cilia-related enriched terms

Bubbleplot of mito-related enriched terms

SOCE-related pathways for ’19 and ’56 overlap

Enrichment comparisons

Save

Mouse genes FEA

Using ALL mouse DEGs (0.05 alpha and -2 and 2 LFC cutoff) before mapping to human genes or selecting top 100 available in LINCS

Load in (mouse DEG) data

’39 nf-core aligned

musdegs_39_UP <- read.csv(here("res", "deseq2_outputs", "GSE149739_mus_degs_UP_220421.csv"))

musdegs_39_DOWN <- read.csv(here("res", "deseq2_outputs", "GSE149739_mus_degs_DOWN_220421.csv"))

’19

musdegs_19_UP <- read.csv(here("res", "deseq2_outputs", "GSE134719_mus_degs_UP_220421.csv"))
musdegs_19_DOWN <- read.csv(here("res", "deseq2_outputs", "GSE134719_mus_degs_DOWN_220421.csv"))

’56

musdegs_56_UP <- read.csv(here("res", "deseq2_outputs", "GSE69556_mus_degs_UP_220421.csv"))

musdegs_56_DOWN <- read.csv(here("res", "deseq2_outputs", "GSE69556_mus_degs_DOWN_220421.csv"))

Gene overlaps

degs_19_56_overlap_up <- merge(musdegs_19_UP, musdegs_56_UP, by = "Mouse.gene.stable.ID")

degs_19_56_overlap_down <- merge(musdegs_19_DOWN, musdegs_56_DOWN, by = "Mouse.gene.stable.ID")

degs_19_56_39_overlap_up <- merge(degs_19_56_overlap_up, musdegs_39_UP, by = "Mouse.gene.stable.ID")

degs_19_56_39_overlap_down <- merge(degs_19_56_overlap_down, musdegs_39_DOWN, by = "Mouse.gene.stable.ID")

degs_39_56_overlap_up <- merge(musdegs_39_UP, musdegs_56_UP, by = "Mouse.gene.stable.ID")

degs_39_56_overlap_down <- merge(musdegs_39_DOWN, musdegs_56_DOWN, by = "Mouse.gene.stable.ID")


degs_39_19_overlap_up <- merge(musdegs_39_UP, musdegs_19_UP, by = "Mouse.gene.stable.ID")

degs_39_19_overlap_down <- merge(musdegs_39_DOWN, musdegs_19_DOWN, by = "Mouse.gene.stable.ID")

FEA p70 precystic ’39 Mouse

fea_mouse_39 <- mouseenrich(musdegs_39_UP, musdegs_39_DOWN, 30)

bp_mouse_39_overall <- barplot(dplyr::filter(fea_mouse_39$topcompared, intersection_size > 10)) + ggtitle("Pre-Cystic Top Upregulated ") 
bp_mouse_39_overall

fea_mouse_39$topcompared$term_name <- stringr::str_wrap(fea_mouse_39$topcompared$term_name, width = 60)

p70_fea_plot <- ggplot(dplyr::filter(fea_mouse_39$topcompared, intersection_size > 10), aes(x=intersection_size, y=term_name,  fill = recall)) + 
    geom_bar( stat = "identity") + 
    scale_fill_continuous(type = "viridis") + 
    labs(x = "Number of Genes Matched to Term", y = "Functional Terms", title = "Pre-Cystic Top Upregulated Enrichment") + 
    theme(text = element_text(size = 14), plot.title = element_text(size = 24))# +
    #theme_minimal(base_size = 8)
p70_fea_plot

bubbleplot

bubbleplot_p70 <- bubbleplot(dplyr::filter(fea_mouse_39$topcompared, intersection_size > 6)) + labs(title = "Top Enriched Terms for Pre-Cystic P70 Dataset") + theme(text = element_text(size = 14), plot.title = element_text(size = 24))

bubbleplot(fea_mouse_39$topcompared) + labs(title = "Top Enriched Terms for Pre-Cystic P70 Dataset") + theme(text = element_text(size = 14), plot.title = element_text(size = 24))

ggsave(p70_fea_plot, filename = here("res", "fea", "p70_fea_plot.pdf"))
## Saving 7 x 5 in image
ggsave(bubbleplot_p70, filename = here("res", "fea", "p70_fea_bubbleplot.pdf"), height = 7, width = 14)

bubbleplot top

bubbleplot(dplyr::filter(fea_mouse_39$topcompared, intersection_size > 10)) #+ labs(title = "")

Cilia-related pathways

Mitochondria-related pathways

Store-operated CA 2+ Entry pathways

Fibrosis-related terms

Immune-related terms

Save plots

ggsave(filename = here("res", "fea", "mouse_39_overall.pdf"), bp_mouse_39_overall)
## Saving 7 x 5 in image

FEA p21 cystic ’56 Mouse

fea_mouse_56 <- mouseenrich(musdegs_56_UP, musdegs_56_DOWN, 30)

bp_mouse_56_overall <- barplot(dplyr::filter(fea_mouse_56$topcompared, intersection_size > 10))
bp_mouse_56_overall

bubbleplot

fea_mouse_56$topcompared$term_name <- stringr::str_wrap(fea_mouse_56$topcompared$term_name, width = 70)


bubbleplot_p21 <- bubbleplot(dplyr::filter(fea_mouse_56$topcompared, intersection_size > 10)) + labs(title = "Top Enriched Terms for Cystic P21 Dataset") + theme(text = element_text(size = 14), plot.title = element_text(size = 24))
ggsave(bubbleplot_p21, filename = here("res", "fea", "p21_fea_bubbleplot.pdf"), height = 7, width = 14)

Cilia-related pathways

Mitochondria-related pathways

Fibrosis-related terms

Immune-related terms

Store-operated CA 2+ Entry pathways

Save plots

ggsave(filename = here("res", "fea", "mouse_56_overall.pdf"), bp_mouse_56_overall)
## Saving 7 x 5 in image

FEA p28 cystic ’19 Mouse

fea_mouse_19 <- mouseenrich(musdegs_19_UP, musdegs_19_DOWN, 40)

bp_mouse_19 <- barplot(fea_mouse_19$topcompared)
bp_mouse_19

bubbleplot

fea_mouse_19$topcompared$term_name <- stringr::str_wrap(fea_mouse_19$topcompared$term_name, width = 70)


bubbleplot_p28 <- bubbleplot(dplyr::filter(fea_mouse_19$topcompared, intersection_size > 10)) + labs(title = "Top Enriched Terms for Cystic P28 Dataset") + theme(text = element_text(size = 14), plot.title = element_text(size = 24))
ggsave(bubbleplot_p28, filename = here("res", "fea", "p28_fea_bubbleplot.pdf"), height = 7, width = 14)

Cilia-related pathways

Mitochondria-related pathways

Fibrosis-related terms

Immune-related terms

Store-operated CA 2+ Entry pathways

Save gprofiler results

Save plots

ggsave(filename = here("res", "fea", "mouse_19_overall.pdf"), bp_mouse_19)
## Saving 7 x 5 in image

Cystic Overlap (’19 and ’56)

read in

fea_56_up <- read.csv(here("res", "fea", "fea_mouse_56_upterms.csv"))

fea_56_down <- read.csv(here("res", "fea", "fea_mouse_56_downterms.csv"))

fea_19_up <- read.csv(here("res", "fea", "fea_mouse_19_upterms.csv"))

fea_19_down <- read.csv(here("res", "fea", "fea_mouse_19_downterms.csv"))

fea_39_up <- read.csv(here("res", "fea", "fea_mouse_39_upterms.csv"))

fea_39_down <- read.csv(here("res", "fea", "fea_mouse_39_downterms.csv"))

merge for overlap

fea_cystic_overlap_up <- merge(fea_56_up, fea_19_up, by = "term_name")

fea_cystic_overlap_down <- merge(fea_56_down, fea_19_down, by = "term_name")

merge for overlap across all datasets

fea_all_overlap_up <- merge(fea_cystic_overlap_up, fea_39_up, by = "term_name")

fea_all_overlap_down <- merge(fea_cystic_overlap_down, fea_39_down, by = "term_name")

cystic up overlap

ggplot(slice_max(fea_cystic_overlap_up, order_by = recall.y, n = 30), aes(x=intersection_size.y, y=term_name,  fill = p_value.y)) + 
    geom_bar( stat = "identity") + 
    scale_fill_continuous(type = "viridis") + 
    labs(x = "Number of Genes Matched to Term", y = "Functional Terms", title = "Top Enriched Terms") + 
    theme_minimal(base_size = 8)

cystic down overlap

ggplot(slice_max(fea_cystic_overlap_down, order_by = recall.y, n = 30), aes(x=intersection_size.y, y=term_name,  fill = p_value.y)) + 
    geom_bar( stat = "identity") + 
    scale_fill_continuous(type = "viridis") + 
    labs(x = "Number of Genes Matched to Term", y = "Functional Terms", title = "Top Enriched Terms") + 
    theme_minimal(base_size = 8)

fea_cystic_overlap_combined <- rbind(fea_cystic_overlap_up, fea_cystic_overlap_down)

fea_cystic_overlap_combined <- fea_cystic_overlap_combined %>% rowwise() %>% mutate(total_genes = sum(intersection_size.x, intersection_size.y))
ggplot(slice_max(fea_cystic_overlap_combined, order_by = recall.x, n = 10), aes(x=direction.x, y=term_name, size = total_genes, fill = p_value.y)) +
  geom_point(alpha=0.7, shape = 21) +
  scale_size(range = c(2, 10), name = "# Genes Matched to Term") + 
  scale_fill_distiller(palette = "Purples") + 
  labs(x = "Differential Expression Direction", y = "Functional Enrichment Terms") + 
  theme_minimal(base_size = 8)

write.csv(fea_cystic_overlap_up, file = here("res", "fea", "fea_cystic_overlap_up.csv"))

write.csv(fea_cystic_overlap_down, file = here("res", "fea", "fea_cystic_overlap_down.csv"))

write.csv(fea_cystic_overlap_combined, file = here("res", "fea", "fea_cystic_overlap_combined.csv"))

Compare Mouse FEA

Term overlap

fea_overlap_19_56_up <- merge(fea_mouse_19$upregulated, fea_mouse_56$upregulated, by = "term_name")
fea_overlap_19_56_down <- merge(fea_mouse_19$downregulated, fea_mouse_56$downregulated, by = "term_name")

#fea_overlap_19_56 <- list(upregulated = fea_overlap_19_56_up, downregulated = fea_overlap_19_56_down)

fea_overap_all_up <- merge(fea_mouse_39$upregulated, fea_overlap_19_56_up, by = "term_name")
fea_overap_all_down <- merge(fea_mouse_39$downregulated, fea_overlap_19_56_down, by = "term_name")

Save

fea_overlap_19_56_upterms <- apply(fea_overlap_19_56_up, 2, as.character)
write.csv(fea_overlap_19_56_upterms, file = here("res", "fea", "fea_overlap_19_56_upterms.csv"))

fea_overlap_19_56_downterms <- apply(fea_overlap_19_56_down, 2, as.character)
write.csv(fea_overlap_19_56_downterms, file = here("res", "fea", "fea_overlap_19_56_downterms.csv"))

fea_overap_all_upterms <- apply(fea_overap_all_up, 2, as.character)
write.csv(fea_overap_all_upterms, file = here("res", "fea", "fea_overap_all_upterms.csv"))

fea_overap_all_downterms <- apply(fea_overap_all_down, 2, as.character)
write.csv(fea_overap_all_downterms, file = here("res", "fea", "fea_overap_all_downterms.csv"))

Mitochondria-related terms overlap

Versions

R.Version()
## $platform
## [1] "x86_64-apple-darwin17.0"
## 
## $arch
## [1] "x86_64"
## 
## $os
## [1] "darwin17.0"
## 
## $system
## [1] "x86_64, darwin17.0"
## 
## $status
## [1] ""
## 
## $major
## [1] "4"
## 
## $minor
## [1] "2.0"
## 
## $year
## [1] "2022"
## 
## $month
## [1] "04"
## 
## $day
## [1] "22"
## 
## $`svn rev`
## [1] "82229"
## 
## $language
## [1] "R"
## 
## $version.string
## [1] "R version 4.2.0 (2022-04-22)"
## 
## $nickname
## [1] "Vigorous Calisthenics"
installed.packages()[names(sessionInfo()$otherPkgs), "Version"]
##                 here                dplyr              ggplot2 
##              "1.0.1"              "1.0.9"              "3.3.6" 
##      signatureSearch SummarizedExperiment              Biobase 
##             "1.10.0"             "1.26.1"             "2.56.0" 
##        GenomicRanges         GenomeInfoDb              IRanges 
##             "1.48.0"             "1.32.2"             "2.30.0" 
##            S4Vectors         BiocGenerics       MatrixGenerics 
##             "0.34.0"             "0.42.0"              "1.8.1" 
##          matrixStats                 Rcpp              stringr 
##             "0.62.0"              "1.0.9"              "1.4.0" 
##              viridis          viridisLite           gprofiler2 
##              "0.6.2"              "0.4.0"              "0.2.1"